Loading packages and subroutines

Package

require(DT) # for databale
require(reshape2) # for dcast
require(ggplot2)
require(ggthemes)
require(ggpubr) # for stat_compare_means

Import table

ImportTable <- function(file, header = T, row.names = 1, sep = ',', check.names = FALSE, ...){
  data <- read.csv(file = file, header = header, row.names = row.names, sep = sep, check.names = check.names, ...)
  return(data)
}

Show_table

Show_table <- function(df, rownames = T, filter="top", options = list(pageLength = 10, scrollX=T), ...){
  if (ncol(df) > 100) {
    df <- df[, 1:100]
    message('Due to the column dim is > 100, it only shows the top 100 columns')
  }
  datatable(df, rownames = rownames, 
            filter=filter, options = options, ... )

}

FileCreate

FileCreate <- function(DirPath = "./",Prefix = "ExampleTest", Suffix = "pdf", version = "0.0.0"){
  date=as.character(Sys.Date())
  DirPath = gsub("/$","",DirPath)
  Suffix = gsub('^[.]',"",Suffix)
  if(! dir.exists(DirPath)){
    dir.create(DirPath,recursive =TRUE)
  }
  if (version == "0.0.0") {
    version=100
    while(TRUE){
      version_=paste(unlist(strsplit(as.character(version),"")),collapse=".")
      DirPathName = paste0(DirPath,"/",date,'-',Prefix,'-v',version_,".",Suffix)
      if(! file.exists(DirPathName)){
        return(DirPathName)
        break
      }
      version = version + 1
    }
  }else{
    DirPathName = paste0(DirPath,"/",date,'-',Prefix,'-v',version_,".",Suffix)
    if(! dir.exists(DirPathName)){
      return(DirPathName)
    }
    return(DirPathName)
  }
}

Import data

Custom feature

custom_feature <- c('Aspergillus rambellii', 'Moniliophthora perniciosa', 
                    'Trichophyton mentagrophytes', 'Aspergillus kawachii', 
                    'Aspergillus ochraceoroseus')

Import raw-relative abundance matrix

otu_df <- ImportTable(file = '../05.Normalized/Rarefy_1329_fungi/2021-07-26-RelativeAbundance_matrix-v1.0.csv') %>%
  t() %>% as.data.frame()

Show_table(otu_df)%>%
  formatSignif(columns = colnames(otu_df)[1:100],
               digits = 3, interval = 1)
## Due to the column dim is > 100, it only shows the top 100 columns

Import modified-relative abundance matrix (remove zero)

otu_mod_df <- ImportTable(file = '../07.FeatureSelection/01.SSTF/2021-07-29-modify_martix_norm-ALL-v1.0.0.csv')

Show_table(otu_mod_df)%>%
  formatSignif(columns = colnames(otu_mod_df)[1:100],
               digits = 3, interval = 1)
## Due to the column dim is > 100, it only shows the top 100 columns

Import median-relative abundance matrix

otu_med_df <- ImportTable(file = '../07.FeatureSelection/02.RelMedianAbundance/2021-07-30-normalized-0.1%-RelMedianAbundance-matrix-v1.0.0.csv')

Show_table(otu_med_df)%>%
  formatSignif(columns = colnames(otu_med_df)[1:100],
               digits = 3, interval = 1)
## Due to the column dim is > 100, it only shows the top 100 columns

Import meta data

meta_df <- ImportTable(file = '../00.RawData/metaData/2021-07-26-metaInfo-subgroup_1329-v6.0.csv')
meta_df$Stage <- factor(meta_df$Stage, c('CTRL', 'adenoma', 'CRC'))
Show_table(meta_df)

Barplot in each cohort

raw relative abundance matrix

# combine_df <- as.data.frame(cbind(meta_df, otu_mod_df[, custom_feature]))
combine_df <- as.data.frame(cbind(meta_df, otu_df[, custom_feature] + 0.00001))

boxplot_pdf <- FileCreate(DirPath = '../10.SelFeatureAnalysi', 
                          Prefix = 'Boxplot-relAbun_norm',
                          Suffix = 'pdf')
pdf(file = boxplot_pdf, width = 13, height = 5)
for (cf in custom_feature) {
  g_bar <- ggplot(data = combine_df, mapping = aes(x = Stage, y = .data[[cf]])) +
    geom_violin(aes(fill = Stage), scale = 'width', trim = F)+
    geom_boxplot(outlier.colour = NA, width = 0.2)+
    facet_wrap(~Cohort, nrow = 1) + 
    theme_calc()+
    scale_fill_manual(values = c(CTRL = '#006f3c',
                                  adenoma = '#f9a73e', 
                                  CRC = '#bf212f'))+
    scale_y_log10()+
    stat_compare_means(comparisons = list(c('CTRL', 'CRC'),
                                          c('adenoma', 'CRC')))+
     theme(axis.text.x=element_text(angle=90, hjust=1))
  plot(g_bar)
}
dev.off()
## png 
##   2

zero modified matrix

# combine_df <- as.data.frame(cbind(meta_df, otu_mod_df[, custom_feature]))
combine_df <- as.data.frame(cbind(meta_df, otu_mod_df[, custom_feature]))
boxplot_pdf <- FileCreate(DirPath = '../10.SelFeatureAnalysi', 
                          Prefix = 'Boxplot-modified_zero_norm',
                          Suffix = 'pdf')
pdf(file = boxplot_pdf, width = 13, height = 5)
for (cf in custom_feature) {
  g_bar <- ggplot(data = combine_df, mapping = aes(x = Stage, y = .data[[cf]])) +
    geom_violin(aes(fill = Stage), scale = 'width', trim = F)+
    geom_boxplot(outlier.colour = NA, width = 0.2)+
    # geom_boxplot(aes(col = Stage), outlier.colour = NA)+
    facet_wrap(~Cohort, nrow = 1) + 
    theme_calc()+
    scale_fill_manual(values = c(CTRL = '#006f3c',
    # scale_color_manual(values = c(CTRL = '#006f3c', 
                                  adenoma = '#f9a73e', 
                                  CRC = '#bf212f'))+
    scale_y_log10()+
    # scale_y_log10(limits = c(min(combine_df[[cf]])/10, max(combine_df[[cf]])*500))+
    stat_compare_means(comparisons = list(c('CTRL', 'CRC'),
                                          c('adenoma', 'CRC')))+
     theme(axis.text.x=element_text(angle=90, hjust=1))
  plot(g_bar)
}
dev.off()
## png 
##   2

median modified matrix

# combine_df <- as.data.frame(cbind(meta_df, otu_mod_df[, custom_feature]))
combine_df <- as.data.frame(cbind(meta_df, otu_med_df[, custom_feature]))
boxplot_pdf <- FileCreate(DirPath = '../10.SelFeatureAnalysi', 
                          Prefix = 'Boxplot-median_norm',
                          Suffix = 'pdf')
pdf(file = boxplot_pdf, width = 13, height = 5)
for (cf in custom_feature) {
  g_bar <- ggplot(data = combine_df, mapping = aes(x = Stage, y = .data[[cf]])) +
    geom_violin(aes(fill = Stage), scale = 'width', trim = F)+
    geom_boxplot(outlier.colour = NA, width = 0.2)+
    # geom_boxplot(aes(col = Stage), outlier.colour = NA)+
    facet_wrap(~Cohort, nrow = 1) + 
    theme_calc()+
    scale_fill_manual(values = c(CTRL = '#006f3c',
    # scale_color_manual(values = c(CTRL = '#006f3c', 
                                  adenoma = '#f9a73e', 
                                  CRC = '#bf212f'))+
    scale_y_log10()+
    # scale_y_log10(limits = c(min(combine_df[[cf]])/10, max(combine_df[[cf]])*500))+
    stat_compare_means(comparisons = list(c('CTRL', 'CRC'),
                                          c('adenoma', 'CRC')))+
     theme(axis.text.x=element_text(angle=90, hjust=1))
  plot(g_bar)
}
dev.off()
## png 
##   2